Preprocessing

# Packages
#library("CyTOF") # Need to adjust name
library("devtools")
library(ruv)
library(rsvd)
load_all() # Load my package
library("cytofkit")
library("gridExtra")
library("ggpubr")
library("tidyverse")
library("FlowSOM")
library("RColorBrewer")
# Takes list of plots, character vector of titles
add_titles <- function(plots, titles){
  map2(plots, titles, function(x, y) x + labs(titles = y))
}
titles <- c("Raw", "Finck", "RUVIII")
setwd("/home/users/allstaff/pullin.j/CyTOF-RUVIII/Data")
# NB: the first file must be the raw one
files <- c("raw_data.rds", "norm_data.rds")
data_list <- map(files, ~readRDS(.))
samples <- c("1B1", "2B1", "3B1", "4B1", "5B1", "6B1")

data_list[[1]] <- data_list[[1]] %>%
  filter(sample %in% samples) %>% 
  mutate(ind = 1:nrow(.)) %>% 
  group_by(sample) %>% 
  sample_n(10000) %>% 
  ungroup() %>% 
  as.data.frame()

index <- as.vector(as.matrix(select(data_list[[1]], ind)))
data_list[[1]] <- select(data_list[[1]], -ind)

data_list[[2]] <- data_list[[2]] %>%
  filter(sample %in% samples) %>% 
  slice(index) %>% 
  as.data.frame()
# This will be written into a wrapper
raw_data <- data_list[[1]]

norm_clusters <- c(33, 30, 28)
M <- make_M(raw_data$cluster, norm_clusters)

raw_Y <- as.matrix(raw_data[3:ncol(raw_data)])

# Standardise the input and then compensate output
col_means <- colMeans(raw_Y)
col_sds <- apply(raw_Y, 2, function(x) sd(x))

for(i in 1:ncol(raw_Y)){
  raw_Y[,i] <- (raw_Y[,i] - col_means[i])/col_sds[i]
}

k <- 1
norm_Y <- fastRUVIII(Y = raw_Y, M, ctl = c(1:ncol(raw_Y)), eta = 1, k = k)$newY

for(i in 1:ncol(norm_Y)){
  norm_Y[,i] <- norm_Y[,i]*col_sds[i] + col_means[i]
}

ruv_data <- data.frame(raw_data[, 1:2], norm_Y)

# Recluster the data
ruv_clusters <- cluster_FlowSOM(data = ruv_data, k = 40, seed = 42)
ruv_data$cluster <- ruv_clusters

# Add it back to the data list
data_list[[length(data_list) + 1]] <- ruv_data
n_data <- length(data_list)
# Perform tSNE on the data
tsne_list <- map(data_list, function(x) compute_tsne(x, 5000))
##   Running t-SNE...with seed 42  DONE
##   Running t-SNE...with seed 42  DONE
##   Running t-SNE...with seed 42  DONE

PCA

plot_pca_samp <- map(data_list, ~plot_scpca_samp(., N = 2000))
plot_pca_samp <- flatten(plot_pca_samp)
plot_pca_samp <- add_titles(plot_pca_samp, rep(titles, each = 3))
ggarrange(plotlist = plot_pca_samp, ncol = 3, nrow = 3, common.legend = TRUE, legend = "right")

t-SNE

t-SNE coloured by sample

plot_tsne_samp <- map(tsne_list, plot_tsne_sample)
plot_tsne_samp <- add_titles(plot_tsne_samp, titles)
ggarrange(plotlist = plot_tsne_samp, ncol = 2, nrow = 2, common.legend = TRUE, legend = "right")

F statistics

plot_Ftsne <- map(tsne_list, tsne_F_stats)
plot_Ftsne <- flatten(plot_Ftsne)
# Note that the axes on the plots are not the same
ggarrange(plotlist = plot_Ftsne, ncol = 2, nrow = 3)

t-SNE coloured by cluster

plot_tsne_clus <- map(tsne_list, plot_tsne_cluster)
plot_tsne_clus <- add_titles(plot_tsne_clus, titles)
ggarrange(plotlist = plot_tsne_clus, ncol = 2, nrow = 2)

Cluster Frequency

clus_freq_plot <- map(data_list, plot_cluster_freq)
clus_freq_plot <- flatten(clus_freq_plot)
ggarrange(plotlist = clus_freq_plot, ncol = 2, nrow = 3)

Median marker intensity

plot_median_exprs <- map(data_list, plot_median_exprs)
plot_median_exprs <- add_titles(plot_median_exprs, titles)
ggarrange(plotlist = plot_median_exprs, ncol = 2, nrow = 2, legend = "bottom")

Marker Densities

marker_densities <- map(data_list, plot_marker_densities)
marker_densities <- add_titles(marker_densities, titles)
ggarrange(plotlist = marker_densities, ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom")

Cluster Matching

clust_match_plot <- plot_cluster_match(data_list[[1]], data_list[[3]])
clust_match_plot <- clust_match_plot + labs(x = titles[[1]], y = titles[[3]])
clust_match_plot